Setup

suppressPackageStartupMessages({
  ## Common
  library(tidyverse)
  library(magrittr)
  library(future.apply)
  library(here)
  library(AnnotationHub)
  library(purrr)
  library(scales)
  library(kableExtra)
  library(tictoc)
  library(ggrepel)
  library(RColorBrewer)
  library(ggpubr)
  library(pander)
  library(rmarkdown)
  ## Project specific
  library(edgeR)
  library(limma)
  library(pheatmap)
  library(cqn)
  library(DT)
  library(htmltools)
  library(msigdbr)
  library(reactable)
  library(fgsea)
})
if (interactive()) setwd(here::here())
theme_set(theme_bw())
cores <- availableCores() - 1
source("~/bioinformatics/bioToolkit/lbFuncs.R")

EnsDb

ens_species <- "Mus musculus"
ens_release <- "104"
ens_assembly <- "GRCm39"
ah <- AnnotationHub() %>%
  subset(species == ens_species) %>%
  subset(rdataclass == "EnsDb")
ahId <- ah$ah_id[str_detect(ah$title, ens_release)]
ensDb <- ah[[ahId]]
chrInfo <- getChromInfoFromEnsembl(ens_assembly, release = ens_release) %>%
  dplyr::filter(coord_system == "chromosome")
primary_chrs <- chrInfo$name
transcripts <- transcripts(ensDb, filter = SeqNameFilter(primary_chrs))
txLen <- exonsBy(ensDb, "tx", filter = SeqNameFilter(primary_chrs)) %>%
  width() %>%
  vapply(sum, integer(1))
mcols(transcripts) <- mcols(transcripts)[
  c("tx_id", "gene_id", "gc_content")
] %>%
  as.data.frame() %>%
  mutate(length = txLen)
geneGc <- transcripts %>%
  mcols() %>%
  as_tibble() %>%
  group_by(gene_id) %>%
  summarise(
    gc_content = sum(gc_content*length) / sum(length),
    length = ceiling(median(length))
  )
genes <- genes(ensDb, filter = SeqNameFilter(primary_chrs)) %>%
  .[,c("gene_id", "gene_name", "gene_biotype", "entrezid")] %>%
  as_tibble() %>%
  left_join(geneGc) %>%
  GRanges()

An EnsDb object was obtained for Ensembl release 101 with the AnnotationHub package. This contained the information required to set up gene and transcript annotations. Gene-level estimates of GC content and length were also generated which may be required as covariates in suitable analyses.

entrezGenes <- genes %>%
  as.data.frame() %>%
  dplyr::filter(!is.na(entrezid)) %>%
  mutate(entrezid = unAsIs(entrezid)) %>%
  unnest(entrezid) %>%
  dplyr::rename(entrez_gene = entrezid)
geneChrs <- as_tibble(genes) %>%
  dplyr::select(gene_id, chromosome = seqnames)

Metadata

metadata <- read_tsv(here("misc/SYNAPSE_METADATA_MANIFEST.tsv")) %>%
  left_join(read_csv(here("misc/metaAPOE.csv"))) %>%
  dplyr::select(
    sample = specimenID, species, genotypeBackground, litter, dateBirth,
    dateDeath, genotype = Genotype, sex = Sex, age = Age, lane, basename = name,
    modelSystemName, individualID, study
  ) %>%
  dplyr::filter(str_detect(sample, "_3M_")) %>%
  mutate(basename = str_remove(basename, ".bam_R(1|2).fastq.gz")) %>%
  distinct(sample, .keep_all = TRUE) %>%
  mutate(
    group = as.factor(paste0(genotype, "_", age, "_", sex)),
    genotype = as.factor(genotype)
  ) %>%
  dplyr::arrange(genotype, group)
genoCols <- metadata$genotype %>%
  unique() %>%
  length() %>%
  brewer.pal("Set1") %>%
  setNames(unique(metadata$genotype))
samples_byGroup <- metadata %>%
  split(f = .$group) %>%
  sapply(function(x){
    pull(x, sample)
  }, simplify = FALSE)

Differential expression data

dgeList <- readRDS(here("files/dgeList.Rds"))
design <- model.matrix(~0 + group, data = dgeList$samples) %>%
  set_colnames(str_remove(colnames(.), "group"))
contrasts <- makeContrasts(
  APOE2v3_female = APOE2_3M_female - APOE3_3M_female,
  APOE2v3_male = APOE2_3M_male - APOE3_3M_male,
  APOE4v3_female = APOE4_3M_female - APOE3_3M_female,
  APOE4v3_male = APOE4_3M_male - APOE3_3M_male,
  levels = colnames(design)
)
topTables <- readRDS(here("files/topTables_cqn.Rds"))
deGenes <- lapply(topTables, dplyr::filter, DE)

Gene sets

Multiple gene set databases were accessed with the msigdbr package. For this analysis the Hallmark, KEGG, Wikipathways and chromosomal based gene sets were chosen.

hm <- msigdbr(ens_species, category = "H") %>%
  left_join(entrezGenes) %>%
  dplyr::filter(!is.na(gene_id)) %>%
  distinct(gs_name, gene_id, .keep_all = TRUE) %>%
  mutate(gs_name = str_remove(gs_name, "^HALLMARK_"))
hmByGene <- hm %>%
  split(f = .$gene_id) %>%
  lapply(extract2, "gs_name")
hmByID <- hm %>%
  split(f = .$gs_name) %>%
  lapply(extract2, "gene_id")
  • The Hallmark gene sets consists of 50 gene sets ranging from sizes of 32 to 206 genes.
kg <- msigdbr(ens_species, category = "C2", subcategory = "CP:KEGG") %>%
  left_join(entrezGenes) %>%
  dplyr::filter(!is.na(gene_id)) %>%
  distinct(gs_name, gene_id, .keep_all = TRUE) %>%
  mutate(gs_name = str_remove(gs_name, "^KEGG_"))
kgByGene <- kg %>%
  split(f = .$gene_id) %>%
  lapply(extract2, "gs_name")
kgByID <- kg %>%
  split(f = .$gs_name) %>%
  lapply(extract2, "gene_id")
  • The KEGG gene sets consists of 186 gene sets ranging from sizes of 10 to 336 genes.
wk <- msigdbr(ens_species, category = "C2", subcategory = "CP:WIKIPATHWAYS") %>%
  left_join(entrezGenes) %>%
  dplyr::filter(!is.na(gene_id)) %>%
  distinct(gs_name, gene_id, .keep_all = TRUE) %>%
  mutate(gs_name = str_remove(gs_name, "^WP_"))
wkByGene <- wk %>%
  split(f = .$gene_id) %>%
  lapply(extract2, "gs_name")
wkByID <- wk %>%
  split(f = .$gs_name) %>%
  lapply(extract2, "gene_id")
  • The Wikipathways gene sets consists of 664 gene sets ranging from sizes of 2 to 435 genes.

Enrichment testing

Hallmark

hm_fry <- contrasts %>%
  apply(MARGIN = 2, function(contrast){
    cpm(dgeList$counts, log = TRUE) %>%
      fry(
        index = hmByID,
        design = design,
        contrast = contrast,
        sort = "mixed"
      ) %>%
      rownames_to_column("Pathway") %>%
      as_tibble() %>%
      mutate(
        Pathway = str_trunc(Pathway, width = 18),
        PValue = formatC(PValue, digits = 1, format = "e"),
        FDR = formatC(FDR, digits = 1, format = "e"),
        PValue.Mixed = formatC(PValue.Mixed, digits = 1, format = "e"),
        FDR.Mixed = formatC(FDR.Mixed, digits = 1, format = "e"),
      )
  })

APOE 2v3 females

datatable(hm_fry[[1]])

APOE 2v3 males

datatable(hm_fry[[2]])

APOE 4v3 females

datatable(hm_fry[[3]])

APOE 4v3 males

datatable(hm_fry[[4]])

KEGG

kg_fry <- contrasts %>%
  apply(MARGIN = 2, function(contrast){
    cpm(dgeList$counts, log = TRUE) %>%
      fry(
        index = kgByID,
        design = design,
        contrast = contrast,
        sort = "mixed"
      ) %>%
      rownames_to_column("Pathway") %>%
      as_tibble() %>%
      mutate(
        Pathway = str_trunc(Pathway, width = 18),
        PValue = formatC(PValue, digits = 1, format = "e"),
        FDR = formatC(FDR, digits = 1, format = "e"),
        PValue.Mixed = formatC(PValue.Mixed, digits = 1, format = "e"),
        FDR.Mixed = formatC(FDR.Mixed, digits = 1, format = "e"),
      )
  })

APOE 2v3 females

datatable(kg_fry[[1]])

APOE 2v3 males

datatable(kg_fry[[2]])

APOE 4v3 females

datatable(kg_fry[[3]])

APOE 4v3 males

datatable(kg_fry[[4]])

Wikipathways

wk_fry <- contrasts %>%
  apply(MARGIN = 2, function(contrast){
    cpm(dgeList$counts, log = TRUE) %>%
      fry(
        index = wkByID,
        design = design,
        contrast = contrast,
        sort = "mixed"
      ) %>%
      rownames_to_column("Pathway") %>%
      as_tibble() %>%
      mutate(
        Pathway = str_trunc(Pathway, width = 18),
        PValue = formatC(PValue, digits = 1, format = "e"),
        FDR = formatC(FDR, digits = 1, format = "e"),
        PValue.Mixed = formatC(PValue.Mixed, digits = 1, format = "e"),
        FDR.Mixed = formatC(FDR.Mixed, digits = 1, format = "e"),
      )
  })

APOE 2v3 females

datatable(wk_fry[[1]])

APOE 2v3 males

datatable(wk_fry[[2]])

APOE 4v3 females

datatable(wk_fry[[3]])

APOE 4v3 males

datatable(wk_fry[[4]])

eQTL bias removal

dist_winRanges <- readRDS(here("files/dist_winRanges.Rds"))
## Takes ~5 mins to run so .Rds saved for convenience
## Note a few genes are lost that do not fall within a diversity range
genes_diversity_path <- here("files/genes_diversity.Rds")
if (!file.exists(genes_diversity_path)) {
  genes_diversity <- lapply(dist_winRanges, function(winRanges){
    overlaps <- findOverlaps(GRanges(dgeList$genes), winRanges)
    lapply(unique(queryHits(overlaps)), function(query){
      subjects <- subjectHits(overlaps)[queryHits(overlaps) == query]
      diversity <- winRanges$diversity[subjects] %>%
        mean()
      dgeList$genes[query,] %>%
        mutate(diversity = diversity)
    }) %>%
      bind_rows() %>%
      GRanges()
  })
  saveRDS(genes_diversity, genes_diversity_path)
} else {
  genes_diversity <- readRDS(genes_diversity_path)
}
thresholds <- seq(0.1, 1.0, 0.1)
threshOutcomes <- sapply(colnames(contrasts), function(cont){
  topT_div <- as_tibble(genes_diversity[[cont]]) %>%
    dplyr::select(gene_id, diversity) %>%
    left_join(topTables[[cont]])
  n_DE <- topT_div %>%
    dplyr::filter(DE) %>%
    nrow()
  n_notDE <- topT_div %>%
    dplyr::filter(!DE) %>%
    nrow()
  lapply(thresholds, function(threshVal){
    topT_div %>%
      dplyr::filter(diversity > threshVal) %>%
      summarise(
        DE_count = sum(DE),
        DE_prop = DE_count / n_DE,
        notDE_count = sum(!DE),
        notDE_prop = notDE_count / n_notDE
      ) %>%
      mutate(threshold = threshVal)
  }) %>%
    bind_rows()
}, simplify = FALSE)
threshPlots_de <- lapply(threshOutcomes, function(x){
  x %>%
    pivot_longer(
      cols = c(DE_count, DE_prop, notDE_count, notDE_prop),
      names_to = c("sig", "type"),
      names_sep = "_"
    ) %>%
    pivot_wider(
      names_from = "type",
      values_from = "value"
    ) %>%
    ggplot(aes(x = threshold, y = prop, group = sig, fill = sig)) +
    geom_bar(position = "dodge", stat = "identity", width = 0.08) +
    geom_text(
      aes(x = threshold, y = prop, label = count),
      size = 3,
      nudge_y = 0.03,
      nudge_x = c(-0.019, 0.019)
    ) +
    scale_x_continuous(breaks = thresholds) +
    scale_y_continuous(breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
    scale_fill_manual(values = c("grey", "black"), labels = c("DE", "Not DE")) +
    theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
    coord_cartesian(clip = "off", xlim = c(0.1,1)) +
    labs(y = "Proportion of\ngenes removed", x = "Diversity threshold") +
    theme(
      legend.position = c(1.05, 0.5),
      legend.title = element_blank(),
      panel.grid.minor.x = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.border = element_blank(),
    )
})

Fry

Hallmark

hm_fry_eqtl <- colnames(contrasts) %>%
  sapply(function(cont){
    lapply(thresholds, function(threshVal){
      genes_passed <- genes_diversity[[cont]] %>%
        as_tibble() %>%
        dplyr::filter(diversity <= threshVal) %>%
        pull(gene_id)
      cpm(
        dgeList$counts[genes_passed,],
        log = TRUE
      ) %>%
        fry(
          index = hmByID,
          design = design,
          contrast = contrasts[,cont],
          sort = "mixed"
        ) %>%
        rownames_to_column("Pathway") %>%
        as_tibble() %>%
        mutate(
          diversity_threshold = threshVal,
          rank = row_number(),
          sig = factor(
            ifelse(FDR.Mixed < 0.05, "DE", "Not DE"),
            levels = c("DE", "Not DE")
          )
        )
    })
  }, simplify = FALSE)
hm_fry_eqtl_top <- sapply(hm_fry_eqtl, function(contrast){
  lapply(contrast, function(threshold){
    threshold[1:20,] %>%
      pull(Pathway)
  }) %>%
    unlist() %>%
    unique()
}, simplify = FALSE)
rankPlots_hm_fry <- sapply(colnames(contrasts), function(cont){
  # plotly::ggplotly({
  lapply(hm_fry_eqtl[[cont]], function(threshold){
    threshold %>%
      dplyr::filter(Pathway %in% hm_fry_eqtl_top[[cont]]) %>%
      mutate(rel_rank = row_number())
  }) %>%
    bind_rows() %>%
    mutate(
      label = ifelse(
        diversity_threshold == 1,
        str_trunc(Pathway, width = 30),
        NA
      )
    ) %>%
    ggplot(aes(
      diversity_threshold,
      rel_rank,
      # -log10(FDR.Mixed),
      group = Pathway,
      colour = Pathway,
      text = sprintf(
        paste0(
          "Pathway: %s<br>",
          "Rank: %i<br>",
          "Relative rank: %i<br>",
          "FDR (mixed): %e<br>",
          "Number of genes: %i<br>",
          "Direction: %s"
        ),
        Pathway, rank, rel_rank, FDR.Mixed, NGenes, Direction
      )
    )) +
    geom_line() +
    geom_point(aes(shape = sig), size = 4) +
    geom_text(aes(label = rank), colour = "black", size = 2.1) +
    geom_text(aes(x = 1.01, label = label), size = 3.2, hjust = 0, na.rm = TRUE) +
    scale_y_reverse(breaks = seq(1, length(hm_fry_eqtl_top[[cont]]), 1)) +
    scale_x_continuous(breaks = thresholds) +
    scale_shape_manual(values = c(17, 19)) +
    labs(x = "Diversity score cut-off", y = "\nRelative rank") +
    theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
    coord_cartesian(clip = "off") +
    theme(
      legend.position = "none",
      panel.grid.minor.y = element_blank(),
      panel.grid.major.y = element_blank(),
      panel.grid.minor.x = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.border = element_blank(),
      axis.title.x = element_blank(),
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank()
    )
  # }, tooltip = "text")
}, simplify = FALSE)

APOE2v3_female

ggarrange(
  rankPlots_hm_fry$APOE2v3_female,
  NULL,
  threshPlots_de$APOE2v3_female,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE2v3_male

ggarrange(
  rankPlots_hm_fry$APOE2v3_male,
  NULL,
  threshPlots_de$APOE2v3_male,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE4v3_female

ggarrange(
  rankPlots_hm_fry$APOE4v3_female,
  NULL,
  threshPlots_de$APOE4v3_female,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE4v3_male

ggarrange(
  rankPlots_hm_fry$APOE4v3_male,
  NULL,
  threshPlots_de$APOE4v3_male,
  ncol = 1,
  heights = c(4,0.2,1)
)

KEGG

kg_fry_eqtl <- colnames(contrasts) %>%
  sapply(function(cont){
    lapply(thresholds, function(threshVal){
      genes_passed <- genes_diversity[[cont]] %>%
        as_tibble() %>%
        dplyr::filter(diversity <= threshVal) %>%
        pull(gene_id)
      cpm(
        dgeList$counts[genes_passed,],
        log = TRUE
      ) %>%
        fry(
          index = kgByID,
          design = design,
          contrast = contrasts[,cont],
          sort = "mixed"
        ) %>%
        rownames_to_column("Pathway") %>%
        as_tibble() %>%
        mutate(
          diversity_threshold = threshVal,
          rank = row_number(),
          sig = factor(
            ifelse(FDR.Mixed < 0.05, "DE", "Not DE"),
            levels = c("DE", "Not DE")
          )
        )
    })
  }, simplify = FALSE)
# plotly::ggplotly({
#   kg_fry_eqtl$APOE4v3_female %>%
#     bind_rows() %>%
#     ggplot(aes(diversity_threshold, rank, group = Pathway, colour = Pathway)) +
#     geom_line() +
#     geom_point() +
#     theme(legend.position = "none") +
#     scale_y_reverse(breaks = c(1, seq(10, 180, 10))) +
#     scale_x_continuous(breaks = seq(0, 1, 0.1)) +
#     labs()
# })
kg_fry_eqtl_top <- sapply(kg_fry_eqtl, function(contrast){
  lapply(contrast, function(threshold){
    threshold[1:20,] %>%
      pull(Pathway)
  }) %>%
    unlist() %>%
    unique()
}, simplify = FALSE)
rankPlots_kg_fry <- sapply(colnames(contrasts), function(cont){
  # plotly::ggplotly({
  lapply(kg_fry_eqtl[[cont]], function(threshold){
    threshold %>%
      dplyr::filter(Pathway %in% kg_fry_eqtl_top[[cont]]) %>%
      mutate(rel_rank = row_number())
  }) %>%
    bind_rows() %>%
    mutate(
      label = ifelse(
        diversity_threshold == 1,
        str_trunc(Pathway, width = 30),
        NA
      )
    ) %>%
    ggplot(aes(
      diversity_threshold,
      rel_rank,
      # -log10(FDR.Mixed),
      group = Pathway,
      colour = Pathway,
      text = sprintf(
        paste0(
          "Pathway: %s<br>",
          "Rank: %i<br>",
          "Relative rank: %i<br>",
          "FDR (mixed): %e<br>",
          "Number of genes: %i<br>",
          "Direction: %s"
        ),
        Pathway, rank, rel_rank, FDR.Mixed, NGenes, Direction
      )
    )) +
    geom_line() +
    geom_point(aes(shape = sig), size = 4) +
    geom_text(aes(label = rank), colour = "black", size = 2.1) +
    geom_text(aes(x = 1.01, label = label), size = 3.2, hjust = 0, na.rm = TRUE) +
    scale_y_reverse(breaks = seq(1, length(kg_fry_eqtl_top[[cont]]), 1)) +
    scale_x_continuous(breaks = thresholds) +
    scale_shape_manual(values = c(17, 19)) +
    labs(x = "Diversity score cut-off", y = "\nRelative rank") +
    theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
    coord_cartesian(clip = "off") +
    theme(
      legend.position = "none",
      panel.grid.minor.y = element_blank(),
      panel.grid.major.y = element_blank(),
      panel.grid.minor.x = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.border = element_blank(),
      axis.title.x = element_blank(),
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank()
    )
  # }, tooltip = "text")
}, simplify = FALSE)

APOE2v3_female

ggarrange(
  rankPlots_kg_fry$APOE2v3_female,
  NULL,
  threshPlots_de$APOE2v3_female,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE2v3_male

ggarrange(
  rankPlots_kg_fry$APOE2v3_male,
  NULL,
  threshPlots_de$APOE2v3_male,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE4v3_female

ggarrange(
  rankPlots_kg_fry$APOE4v3_female,
  NULL,
  threshPlots_de$APOE4v3_female,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE4v3_male

ggarrange(
  rankPlots_kg_fry$APOE4v3_male,
  NULL,
  threshPlots_de$APOE4v3_male,
  ncol = 1,
  heights = c(4,0.2,1)
)

Wikipathways

wk_fry_eqtl <- colnames(contrasts) %>%
  sapply(function(cont){
    lapply(thresholds, function(threshVal){
      genes_passed <- genes_diversity[[cont]] %>%
        as_tibble() %>%
        dplyr::filter(diversity <= threshVal) %>%
        pull(gene_id)
      cpm(
        dgeList$counts[genes_passed,],
        log = TRUE
      ) %>%
        fry(
          index = wkByID,
          design = design,
          contrast = contrasts[,cont],
          sort = "mixed"
        ) %>%
        rownames_to_column("Pathway") %>%
        as_tibble() %>%
        mutate(
          diversity_threshold = threshVal,
          rank = row_number(),
          sig = factor(
            ifelse(FDR.Mixed < 0.05, "DE", "Not DE"),
            levels = c("DE", "Not DE")
          )
        )
    })
  }, simplify = FALSE)
wk_fry_eqtl_top <- sapply(wk_fry_eqtl, function(contrast){
  lapply(contrast, function(threshold){
    threshold[1:20,] %>%
      pull(Pathway)
  }) %>%
    unlist() %>%
    unique()
}, simplify = FALSE)
rankPlots_wk_fry <- sapply(colnames(contrasts), function(cont){
  # plotly::ggplotly({
  lapply(wk_fry_eqtl[[cont]], function(threshold){
    threshold %>%
      dplyr::filter(Pathway %in% wk_fry_eqtl_top[[cont]]) %>%
      mutate(rel_rank = row_number())
  }) %>%
    bind_rows() %>%
    mutate(
      label = ifelse(
        diversity_threshold == 1,
        str_trunc(Pathway, width = 30),
        NA
      )
    ) %>%
    ggplot(aes(
      diversity_threshold,
      rel_rank,
      # -log10(FDR.Mixed),
      group = Pathway,
      colour = Pathway,
      text = sprintf(
        paste0(
          "Pathway: %s<br>",
          "Rank: %i<br>",
          "Relative rank: %i<br>",
          "FDR (mixed): %e<br>",
          "Number of genes: %i<br>",
          "Direction: %s"
        ),
        Pathway, rank, rel_rank, FDR.Mixed, NGenes, Direction
      )
    )) +
    geom_line() +
    geom_point(aes(shape = sig), size = 4) +
    geom_text(aes(label = rank), colour = "black", size = 2.1) +
    geom_text(aes(x = 1.01, label = label), size = 3.2, hjust = 0, na.rm = TRUE) +
    scale_y_reverse(breaks = seq(1, length(wk_fry_eqtl_top[[cont]]), 1)) +
    scale_x_continuous(breaks = thresholds) +
    scale_shape_manual(values = c(17, 19)) +
    labs(x = "Diversity score cut-off", y = "\nRelative rank") +
    theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
    coord_cartesian(clip = "off") +
    theme(
      legend.position = "none",
      panel.grid.minor.y = element_blank(),
      panel.grid.major.y = element_blank(),
      panel.grid.minor.x = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.border = element_blank(),
      axis.title.x = element_blank(),
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank()
    )
  # }, tooltip = "text")
}, simplify = FALSE)

APOE2v3_female

ggarrange(
  rankPlots_wk_fry$APOE2v3_female,
  NULL,
  threshPlots_de$APOE2v3_female,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE2v3_male

ggarrange(
  rankPlots_wk_fry$APOE2v3_male,
  NULL,
  threshPlots_de$APOE2v3_male,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE4v3_female

ggarrange(
  rankPlots_wk_fry$APOE4v3_female,
  NULL,
  threshPlots_de$APOE4v3_female,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE4v3_male

ggarrange(
  rankPlots_wk_fry$APOE4v3_male,
  NULL,
  threshPlots_de$APOE4v3_male,
  ncol = 1,
  heights = c(4,0.2,1)
)

GSEA

ranks_d <- lapply(topTables, function(x){
  x %>%
    mutate(stat = -log10(PValue) * sign(logFC)) %>%
    dplyr::arrange(desc(stat)) %>%
    with(structure(stat, names = gene_id))
})
ranks_nd <- lapply(topTables, function(x){
  x %>%
    mutate(stat = -log10(PValue)) %>%
    dplyr::arrange(desc(stat)) %>%
    with(structure(stat, names = gene_id))
})

Hallmark

hm_gsea_eqtl <- colnames(contrasts) %>%
  sapply(function(cont){
    lapply(thresholds, function(threshVal){
      genes_passed <- genes_diversity[[cont]] %>%
        as_tibble() %>%
        dplyr::filter(diversity <= threshVal) %>%
        pull(gene_id)
      fgseaMultilevel(hmByID, ranks_d[[cont]][genes_passed], eps = 0) %>%
        as_tibble() %>%
        dplyr::rename(FDR = padj, Pathway = pathway) %>%
        dplyr::arrange(pval) %>%
        mutate(
          edgeSize = vapply(leadingEdge, length, numeric(1)),
          Direction = case_when(
            sign(ES) == 1 ~ "Up",
            sign(ES) == -1 ~ "Down",
            sign(ES) == 0 ~ "None"
          ),
          diversity_threshold = threshVal,
          rank = row_number(),
          bonf.p.adj = p.adjust(pval, "bonferroni"),
          sig = factor(
            ifelse(bonf.p.adj < 0.05, "DE", "Not DE"),
            levels = c("DE", "Not DE")
          )
        )
    })
  }, simplify = FALSE)
hm_gsea_eqtl_top <- sapply(hm_gsea_eqtl, function(contrast){
  lapply(contrast, function(threshold){
    threshold[1:20,] %>%
      pull(Pathway)
  }) %>%
    unlist() %>%
    unique()
}, simplify = FALSE)
rankPlots_hm_gsea <- sapply(colnames(contrasts), function(cont){
  # plotly::ggplotly({
  lapply(hm_gsea_eqtl[[cont]], function(threshold){
    threshold %>%
      dplyr::filter(Pathway %in% hm_gsea_eqtl_top[[cont]]) %>%
      mutate(rel_rank = row_number())
  }) %>%
    bind_rows() %>%
    mutate(
      label = ifelse(
        diversity_threshold == 1,
        str_trunc(Pathway, width = 30),
        NA
      )
    ) %>%
    ggplot(aes(
      diversity_threshold,
      rel_rank,
      # -log10(FDR.Mixed),
      group = Pathway,
      colour = Pathway,
      text = sprintf(
        paste0(
          "Pathway: %s<br>",
          "Rank: %i<br>",
          "Relative rank: %i<br>",
          "FDR: %e<br>",
          "Leading edge size: %i<br>",
          "Direction: %s"
        ),
        Pathway, rank, rel_rank, FDR, edgeSize, Direction
      )
    )) +
    geom_line() +
    geom_point(aes(shape = sig), size = 4) +
    geom_text(aes(label = rank), colour = "black", size = 2.1) +
    geom_text(aes(x = 1.01, label = label), size = 3.2, hjust = 0, na.rm = TRUE) +
    scale_y_reverse(breaks = seq(1, length(hm_gsea_eqtl_top[[cont]]), 1)) +
    scale_x_continuous(breaks = thresholds) +
    scale_shape_manual(values = c(17, 19)) +
    labs(x = "Diversity score cut-off", y = "\nRelative rank") +
    theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
    coord_cartesian(clip = "off") +
    theme(
      legend.position = "none",
      panel.grid.minor.y = element_blank(),
      panel.grid.major.y = element_blank(),
      panel.grid.minor.x = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.border = element_blank(),
      axis.title.x = element_blank(),
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank()
    )
  # }, tooltip = "text")
}, simplify = FALSE)

APOE2v3_female

ggarrange(
  rankPlots_hm_gsea$APOE2v3_female,
  NULL,
  threshPlots_de$APOE2v3_female,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE2v3_male

ggarrange(
  rankPlots_hm_gsea$APOE2v3_male,
  NULL,
  threshPlots_de$APOE2v3_male,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE4v3_female

ggarrange(
  rankPlots_hm_gsea$APOE4v3_female,
  NULL,
  threshPlots_de$APOE4v3_female,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE4v3_male

ggarrange(
  rankPlots_hm_gsea$APOE4v3_male,
  NULL,
  threshPlots_de$APOE4v3_male,
  ncol = 1,
  heights = c(4,0.2,1)
)

KEGG

kg_gsea_eqtl <- colnames(contrasts) %>%
  sapply(function(cont){
    lapply(thresholds, function(threshVal){
      genes_passed <- genes_diversity[[cont]] %>%
        as_tibble() %>%
        dplyr::filter(diversity <= threshVal) %>%
        pull(gene_id)
      fgseaMultilevel(kgByID, ranks_d[[cont]][genes_passed], eps = 0) %>%
        as_tibble() %>%
        dplyr::rename(FDR = padj, Pathway = pathway) %>%
        dplyr::arrange(pval) %>%
        mutate(
          edgeSize = vapply(leadingEdge, length, numeric(1)),
          Direction = case_when(
            sign(ES) == 1 ~ "Up",
            sign(ES) == -1 ~ "Down",
            sign(ES) == 0 ~ "None"
          ),
          diversity_threshold = threshVal,
          rank = row_number(),
          bonf.p.adj = p.adjust(pval, "bonferroni"),
          sig = factor(
            ifelse(bonf.p.adj < 0.05, "DE", "Not DE"),
            levels = c("DE", "Not DE")
          )
        )
    })
  }, simplify = FALSE)
kg_gsea_eqtl_top <- sapply(kg_gsea_eqtl, function(contrast){
  lapply(contrast, function(threshold){
    threshold[1:20,] %>%
      pull(Pathway)
  }) %>%
    unlist() %>%
    unique()
}, simplify = FALSE)
rankPlots_kg_gsea <- sapply(colnames(contrasts), function(cont){
  # plotly::ggplotly({
  lapply(kg_gsea_eqtl[[cont]], function(threshold){
    threshold %>%
      dplyr::filter(Pathway %in% kg_gsea_eqtl_top[[cont]]) %>%
      mutate(rel_rank = row_number())
  }) %>%
    bind_rows() %>%
    mutate(
      label = ifelse(
        diversity_threshold == 1,
        str_trunc(Pathway, width = 30),
        NA
      )
    ) %>%
    ggplot(aes(
      diversity_threshold,
      rel_rank,
      # -log10(FDR.Mixed),
      group = Pathway,
      colour = Pathway,
      text = sprintf(
        paste0(
          "Pathway: %s<br>",
          "Rank: %i<br>",
          "Relative rank: %i<br>",
          "FDR: %e<br>",
          "Leading edge size: %i<br>",
          "Direction: %s"
        ),
        Pathway, rank, rel_rank, FDR, edgeSize, Direction
      )
    )) +
    geom_line() +
    geom_point(aes(shape = sig), size = 4) +
    geom_text(aes(label = rank), colour = "black", size = 2.1) +
    geom_text(aes(x = 1.01, label = label), size = 3.2, hjust = 0, na.rm = TRUE) +
    scale_y_reverse(breaks = seq(1, length(kg_gsea_eqtl_top[[cont]]), 1)) +
    scale_x_continuous(breaks = thresholds) +
    scale_shape_manual(values = c(17, 19)) +
    labs(x = "Diversity score cut-off", y = "\nRelative rank") +
    theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
    coord_cartesian(clip = "off") +
    theme(
      legend.position = "none",
      panel.grid.minor.y = element_blank(),
      panel.grid.major.y = element_blank(),
      panel.grid.minor.x = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.border = element_blank(),
      axis.title.x = element_blank(),
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank()
    )
  # }, tooltip = "text")
}, simplify = FALSE)

APOE2v3_female

ggarrange(
  rankPlots_kg_gsea$APOE2v3_female,
  NULL,
  threshPlots_de$APOE2v3_female,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE2v3_male

ggarrange(
  rankPlots_kg_gsea$APOE2v3_male,
  NULL,
  threshPlots_de$APOE2v3_male,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE4v3_female

ggarrange(
  rankPlots_kg_gsea$APOE4v3_female,
  NULL,
  threshPlots_de$APOE4v3_female,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE4v3_male

ggarrange(
  rankPlots_kg_gsea$APOE4v3_male,
  NULL,
  threshPlots_de$APOE4v3_male,
  ncol = 1,
  heights = c(4,0.2,1)
)

Wikipathways

wk_gsea_eqtl <- colnames(contrasts) %>%
  sapply(function(cont){
    lapply(thresholds, function(threshVal){
      genes_passed <- genes_diversity[[cont]] %>%
        as_tibble() %>%
        dplyr::filter(diversity <= threshVal) %>%
        pull(gene_id)
      fgseaMultilevel(wkByID, ranks_d[[cont]][genes_passed], eps = 0) %>%
        as_tibble() %>%
        dplyr::rename(FDR = padj, Pathway = pathway) %>%
        dplyr::arrange(pval) %>%
        mutate(
          edgeSize = vapply(leadingEdge, length, numeric(1)),
          Direction = case_when(
            sign(ES) == 1 ~ "Up",
            sign(ES) == -1 ~ "Down",
            sign(ES) == 0 ~ "None"
          ),
          diversity_threshold = threshVal,
          rank = row_number(),
          bonf.p.adj = p.adjust(pval, "bonferroni"),
          sig = factor(
            ifelse(bonf.p.adj < 0.05, "DE", "Not DE"),
            levels = c("DE", "Not DE")
          )
        )
    })
  }, simplify = FALSE)
wk_gsea_eqtl_top <- sapply(wk_gsea_eqtl, function(contrast){
  lapply(contrast, function(threshold){
    threshold[1:20,] %>%
      pull(Pathway)
  }) %>%
    unlist() %>%
    unique()
}, simplify = FALSE)
rankPlots_wk_gsea <- sapply(colnames(contrasts), function(cont){
  # plotly::ggplotly({
  lapply(wk_gsea_eqtl[[cont]], function(threshold){
    threshold %>%
      dplyr::filter(Pathway %in% wk_gsea_eqtl_top[[cont]]) %>%
      mutate(rel_rank = row_number())
  }) %>%
    bind_rows() %>%
    mutate(
      label = ifelse(
        diversity_threshold == 1,
        str_trunc(Pathway, width = 30),
        NA
      )
    ) %>%
    ggplot(aes(
      diversity_threshold,
      rel_rank,
      # -log10(FDR.Mixed),
      group = Pathway,
      colour = Pathway,
      text = sprintf(
        paste0(
          "Pathway: %s<br>",
          "Rank: %i<br>",
          "Relative rank: %i<br>",
          "FDR: %e<br>",
          "Leading edge size: %i<br>",
          "Direction: %s"
        ),
        Pathway, rank, rel_rank, FDR, edgeSize, Direction
      )
    )) +
    geom_line() +
    geom_point(aes(shape = sig), size = 4) +
    geom_text(aes(label = rank), colour = "black", size = 2.1) +
    geom_text(aes(x = 1.01, label = label), size = 3.2, hjust = 0, na.rm = TRUE) +
    scale_y_reverse(breaks = seq(1, length(wk_gsea_eqtl_top[[cont]]), 1)) +
    scale_x_continuous(breaks = thresholds) +
    scale_shape_manual(values = c(17, 19)) +
    labs(x = "Diversity score cut-off", y = "\nRelative rank") +
    theme(plot.margin = margin(0, 5, 0, 0, unit = "cm")) +
    coord_cartesian(clip = "off") +
    theme(
      legend.position = "none",
      panel.grid.minor.y = element_blank(),
      panel.grid.major.y = element_blank(),
      panel.grid.minor.x = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.border = element_blank(),
      axis.title.x = element_blank(),
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank()
    )
  # }, tooltip = "text")
}, simplify = FALSE)

APOE2v3_female

ggarrange(
  rankPlots_wk_gsea$APOE2v3_female,
  NULL,
  threshPlots_de$APOE2v3_female,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE2v3_male

ggarrange(
  rankPlots_wk_gsea$APOE2v3_male,
  NULL,
  threshPlots_de$APOE2v3_male,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE4v3_female

ggarrange(
  rankPlots_wk_gsea$APOE4v3_female,
  NULL,
  threshPlots_de$APOE4v3_female,
  ncol = 1,
  heights = c(4,0.2,1)
)

APOE4v3_male

ggarrange(
  rankPlots_wk_gsea$APOE4v3_male,
  NULL,
  threshPlots_de$APOE4v3_male,
  ncol = 1,
  heights = c(4,0.2,1)
)